{
  "cells": [
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "Za8-Nr5k11fh"
      },
      "source": [
        "##### Copyright 2018 The TensorFlow Authors."
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "cellView": "form",
        "id": "Eq10uEbw0E4l"
      },
      "outputs": [],
      "source": [
        "#@title Licensed under the Apache License, Version 2.0 (the \"License\");\n",
        "# you may not use this file except in compliance with the License.\n",
        "# You may obtain a copy of the License at\n",
        "#\n",
        "# https://www.apache.org/licenses/LICENSE-2.0\n",
        "#\n",
        "# Unless required by applicable law or agreed to in writing, software\n",
        "# distributed under the License is distributed on an \"AS IS\" BASIS,\n",
        "# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n",
        "# See the License for the specific language governing permissions and\n",
        "# limitations under the License."
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "Nm71sonIiJjH"
      },
      "source": [
        "# Moving average"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "C34f-r1Mhzkj"
      },
      "source": [
        "<table class=\"tfo-notebook-buttons\" align=\"left\">\n",
        "  <td>\n",
        "    <a target=\"_blank\" href=\"https://colab.research.google.com/github/tensorflow/examples/blob/master/courses/udacity_intro_to_tensorflow_for_deep_learning/l08c03_moving_average.ipynb\"><img src=\"https://www.tensorflow.org/images/colab_logo_32px.png\" />Run in Google Colab</a>\n",
        "  </td>\n",
        "  <td>\n",
        "    <a target=\"_blank\" href=\"https://github.com/tensorflow/examples/blob/master/courses/udacity_intro_to_tensorflow_for_deep_learning/l08c03_moving_average.ipynb\"><img src=\"https://www.tensorflow.org/images/GitHub-Mark-32px.png\" />View source on GitHub</a>\n",
        "  </td>\n",
        "</table>"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "vidayERjaO5q"
      },
      "source": [
        "## Setup"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "id": "gqWabzlJ63nL"
      },
      "outputs": [],
      "source": [
        "import numpy as np\n",
        "import matplotlib.pyplot as plt\n",
        "import tensorflow as tf\n",
        "\n",
        "keras = tf.keras"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "id": "sJwA96JU00pW"
      },
      "outputs": [],
      "source": [
        "def plot_series(time, series, format=\"-\", start=0, end=None, label=None):\n",
        "    plt.plot(time[start:end], series[start:end], format, label=label)\n",
        "    plt.xlabel(\"Time\")\n",
        "    plt.ylabel(\"Value\")\n",
        "    if label:\n",
        "        plt.legend(fontsize=14)\n",
        "    plt.grid(True)\n",
        "    \n",
        "def trend(time, slope=0):\n",
        "    return slope * time\n",
        "\n",
        "def seasonal_pattern(season_time):\n",
        "    \"\"\"Just an arbitrary pattern, you can change it if you wish\"\"\"\n",
        "    return np.where(season_time < 0.4,\n",
        "                    np.cos(season_time * 2 * np.pi),\n",
        "                    1 / np.exp(3 * season_time))\n",
        "\n",
        "def seasonality(time, period, amplitude=1, phase=0):\n",
        "    \"\"\"Repeats the same pattern at each period\"\"\"\n",
        "    season_time = ((time + phase) % period) / period\n",
        "    return amplitude * seasonal_pattern(season_time)\n",
        "\n",
        "def white_noise(time, noise_level=1, seed=None):\n",
        "    rnd = np.random.RandomState(seed)\n",
        "    return rnd.randn(len(time)) * noise_level"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "yVo6CcpRaW7u"
      },
      "source": [
        "## Trend and Seasonality"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "id": "BLt-pLiZ0nfB"
      },
      "outputs": [],
      "source": [
        "time = np.arange(4 * 365 + 1)\n",
        "\n",
        "slope = 0.05\n",
        "baseline = 10\n",
        "amplitude = 40\n",
        "series = baseline + trend(time, slope) + seasonality(time, period=365, amplitude=amplitude)\n",
        "\n",
        "noise_level = 5\n",
        "noise = white_noise(time, noise_level, seed=42)\n",
        "\n",
        "series += noise\n",
        "\n",
        "plt.figure(figsize=(10, 6))\n",
        "plot_series(time, series)\n",
        "plt.show()"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "bjD8ncEZbjEW"
      },
      "source": [
        "## Naive Forecast"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "id": "Pj_-uCeYxcAb"
      },
      "outputs": [],
      "source": [
        "split_time = 1000\n",
        "time_train = time[:split_time]\n",
        "x_train = series[:split_time]\n",
        "time_valid = time[split_time:]\n",
        "x_valid = series[split_time:]\n",
        "\n",
        "naive_forecast = series[split_time - 1:-1]\n",
        "\n",
        "plt.figure(figsize=(10, 6))\n",
        "plot_series(time_valid, x_valid, start=0, end=150, label=\"Series\")\n",
        "plot_series(time_valid, naive_forecast, start=1, end=151, label=\"Forecast\")"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "Uh_7244Gsxfx"
      },
      "source": [
        "Now let's compute the mean absolute error between the forecasts and the predictions in the validation period:"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "id": "byNnC7IbsnMZ"
      },
      "outputs": [],
      "source": [
        "keras.metrics.mean_absolute_error(x_valid, naive_forecast).numpy()"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "WGPBC9QttI1u"
      },
      "source": [
        "That's our baseline, now let's try a moving average."
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "MLtZbFoU8OH-"
      },
      "source": [
        "## Moving Average"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "id": "YGz5UsUdf2tV"
      },
      "outputs": [],
      "source": [
        "def moving_average_forecast(series, window_size):\n",
        "  \"\"\"Forecasts the mean of the last few values.\n",
        "     If window_size=1, then this is equivalent to naive forecast\"\"\"\n",
        "  forecast = []\n",
        "  for time in range(len(series) - window_size):\n",
        "    forecast.append(series[time:time + window_size].mean())\n",
        "  return np.array(forecast)"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "id": "Le2gNBthBWPN"
      },
      "outputs": [],
      "source": [
        "def moving_average_forecast(series, window_size):\n",
        "  \"\"\"Forecasts the mean of the last few values.\n",
        "     If window_size=1, then this is equivalent to naive forecast\n",
        "     This implementation is *much* faster than the previous one\"\"\"\n",
        "  mov = np.cumsum(series)\n",
        "  mov[window_size:] = mov[window_size:] - mov[:-window_size]\n",
        "  return mov[window_size - 1:-1] / window_size"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "id": "F50zyJGoDNJl"
      },
      "outputs": [],
      "source": [
        "moving_avg = moving_average_forecast(series, 30)[split_time - 30:]\n",
        "\n",
        "plt.figure(figsize=(10, 6))\n",
        "plot_series(time_valid, x_valid, label=\"Series\")\n",
        "plot_series(time_valid, moving_avg, label=\"Moving average (30 days)\")"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "id": "wG7pTAd7z0e8"
      },
      "outputs": [],
      "source": [
        "keras.metrics.mean_absolute_error(x_valid, moving_avg).numpy()"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "JMYPnJqwz8nS"
      },
      "source": [
        "That's worse than naive forecast! The moving average does not anticipate trend or seasonality, so let's try to remove them by using differencing. Since the seasonality period is 365 days, we will subtract the value at time *t* – 365 from the value at time *t*."
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "id": "5pqySF7-rJR4"
      },
      "outputs": [],
      "source": [
        "diff_series = (series[365:] - series[:-365])\n",
        "diff_time = time[365:]\n",
        "\n",
        "plt.figure(figsize=(10, 6))\n",
        "plot_series(diff_time, diff_series, label=\"Series(t) – Series(t–365)\")\n",
        "plt.show()"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "WDNer84g8OIF"
      },
      "source": [
        "Focusing on the validation period:"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "id": "-O21jlnA8OIG"
      },
      "outputs": [],
      "source": [
        "plt.figure(figsize=(10, 6))\n",
        "plot_series(time_valid, diff_series[split_time - 365:], label=\"Series(t) – Series(t–365)\")\n",
        "plt.show()"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "xPlPlS7DskWg"
      },
      "source": [
        "Great, the trend and seasonality seem to be gone, so now we can use the moving average:"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "id": "QmZpz7arsjbb"
      },
      "outputs": [],
      "source": [
        "diff_moving_avg = moving_average_forecast(diff_series, 50)[split_time - 365 - 50:]\n",
        "\n",
        "plt.figure(figsize=(10, 6))\n",
        "plot_series(time_valid, diff_series[split_time - 365:], label=\"Series(t) – Series(t–365)\")\n",
        "plot_series(time_valid, diff_moving_avg, label=\"Moving Average of Diff\")\n",
        "plt.show()"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "Gno9S2lyecnc"
      },
      "source": [
        "Now let's bring back the trend and seasonality by adding the past values from t – 365:"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "id": "Dv6RWFq7TFGB"
      },
      "outputs": [],
      "source": [
        "diff_moving_avg_plus_past = series[split_time - 365:-365] + diff_moving_avg\n",
        "\n",
        "plt.figure(figsize=(10, 6))\n",
        "plot_series(time_valid, x_valid, label=\"Series\")\n",
        "plot_series(time_valid, diff_moving_avg_plus_past, label=\"Forecasts\")\n",
        "plt.show()"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "id": "59jmBrwcTFCx"
      },
      "outputs": [],
      "source": [
        "keras.metrics.mean_absolute_error(x_valid, diff_moving_avg_plus_past).numpy()"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "vx9Et1Hkeusl"
      },
      "source": [
        "Better than naive forecast, good. However the forecasts look a bit too random, because we're just adding past values, which were noisy. Let's use a moving averaging on past values to remove some of the noise:"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "id": "K81dtROoTE_r"
      },
      "outputs": [],
      "source": [
        "diff_moving_avg_plus_smooth_past = moving_average_forecast(series[split_time - 370:-359], 11) + diff_moving_avg\n",
        "\n",
        "plt.figure(figsize=(10, 6))\n",
        "plot_series(time_valid, x_valid, label=\"Series\")\n",
        "plot_series(time_valid, diff_moving_avg_plus_smooth_past, label=\"Forecasts\")\n",
        "plt.show()"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "id": "iN2MsBxWTE3m"
      },
      "outputs": [],
      "source": [
        "keras.metrics.mean_absolute_error(x_valid, diff_moving_avg_plus_smooth_past).numpy()"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "WKnmJisHcvTW"
      },
      "source": [
        "That's starting to look pretty good! Let's see if we can do better with a Machine Learning model."
      ]
    }
  ],
  "metadata": {
    "colab": {
      "collapsed_sections": [],
      "name": "l08c03_moving_average.ipynb",
      "toc_visible": true
    },
    "kernelspec": {
      "display_name": "Python 3",
      "name": "python3"
    }
  },
  "nbformat": 4,
  "nbformat_minor": 0
}
